from __future__ import unicode_literals

import sys

from prettytable import PrettyTable
import time

from bs4 import BeautifulSoup

'''
sudo pip3 install bs4
'''

now = time.time()  # seconds
print(now)
html_file = '/Users/ddy/ziroom-20180809093102-1533778262435.html'

soup = ''

with open(html_file) as content:
    soup = BeautifulSoup(content, 'html.parser')

print(soup.find(class_='nomsgs') is not None)
now_1 = time.time()
print(now_1)
cost = now_1 - now
print("cost time: {} s".format(cost))

li_house = soup.find(id='houseList').find_all('li')

table = PrettyTable(['name', 'balcony', 'area', 'floor', 'rooms', 'location'], encoding=sys.stdout.encoding)
for house in li_house:
    title = house.find('h3').select('a.t1')[0].get_text()
    balcony_span = house.find(class_='balcony')
    balcony = ''
    if balcony_span is not None:
        balcony = balcony_span.get_text()

    detail_div = house.find(class_='detail')
    detail_items = detail_div.select('span')

    table.add_row([title.split('-')[1], balcony, detail_items[0].get_text(), detail_items[1].get_text(),
                   detail_items[2].get_text(), detail_items[3].get_text()])
    print('{name:<20}'.format(name=title), end=', ')
    print('{balcony:<10}'.format(balcony=balcony), end=', ')
    print('')
print(table)
